

/*
clean intraday auction shock data: govpx, cme, and taq data
input data comes proprietary sources

output:

govpx:
- govpx_shock_all.dta: shocks of all traded securities around all auctions
- govpx_shock.dta: shocks of only securities of given auction
-- (when-issued or existing trades, if a reopening)
- govpx_nonauction_shock_all.dta: pseudo-shocks on non-auction dates

cme:
- cme_shock_all.dta: shocks of all futures of given auction
- cme_shock_cls.dta: wide shocks of "closest" future of given auction
-- (based on time to contract delivery)

taq:
- taq_shock_all.dta: shocks of ETFs around given auction
- taq_shock.dta: wide shocks of ETFs around given auction
*/



********************************************************************************
********************************************************************************
* govpx
n di "cleaning govpx"

******************************
* create auction govpx shocks
* use actual yield data pre-1999, indicative after
local ind_year_cutoff 1999

* pre data
n di "loading govpx pre"
use ../data/input_restricted/govpx_auction_pre.dta, clear
gen yld = bidyld if year(dofc(date))<`ind_year_cutoff'
replace yld = indbyld if year(dofc(date))>=`ind_year_cutoff'
drop bidyld askyld indbyld indayld

* keep last observation
bys cusip when_issued date auction_cusip (time): ///
	egen double final_time = max(time)
keep if time==final_time
drop final_time

* wide format
reshape wide yld, string j(data_type) ///
	i(cusip when_issued date time auction_cusip close_time release_time)
rename yldmedian yld_pre
rename yldcount N_obs_pre
rename time time_pre

sort close_time auction_cusip cusip when_issued
tempfile govpx_pre
save `govpx_pre', replace


* post data
n di "loading govpx post"
use ../data/input_restricted/govpx_auction_post.dta, clear
gen yld = bidyld if year(dofc(date))<`ind_year_cutoff'
replace yld = indbyld if year(dofc(date))>=`ind_year_cutoff'
drop bidyld askyld indbyld indayld

* keep first observation
bys cusip when_issued date auction_cusip (time): ///
	egen double first_time = min(time)
keep if time==first_time
drop first_time

* wide format
reshape wide yld, string j(data_type) ///
	i(cusip when_issued date time auction_cusip close_time release_time)
rename yldmedian yld_post
rename yldcount N_obs_post
rename time time_post

* merge with pre data
sort close_time auction_cusip cusip when_issued
merge 1:1 cusip when_issued date auction_cusip close_time release_time ///
	using `govpx_pre', nogen keep(match)

* shocks and misc cleaning
sort close_time auction_cusip cusip when_issued
* shocks in basis points
gen D_shock = 100*(yld_post - yld_pre)
drop if D_shock==.
* stata date format
replace date = dofc(date)
format date %td
* save
drop yld_post yld_pre
save ../data/input/govpx_shock_all.dta, replace


******************************
* auction shock data
* one observation per auction
use ../data/input/govpx_shock_all.dta, clear

* shock from cusip of given auction
keep if cusip==auction_cusip
drop cusip

* for reopening auctions: drop when-issued
* use spot yields when available; only use when-issued if no spot yields
tempvar idx_wi_drop
bys date auction_cusip (when_issued): gen `idx_wi_drop' = (_N==2) & (when_issued==1)
drop if `idx_wi_drop'==1
drop `idx_wi_drop'

* save
save ../data/input/govpx_shock.dta, replace



********************************************************************************
********************************************************************************
* cme
n di "cleaning cme"

******************************
* create auction cme shocks

* pre data
n di "loading cme pre"
use ../data/input_restricted/cme_auction_pre.dta, clear

* keep last observation
bys ticker contract_year contract_month date auction_cusip (time): ///
	egen double final_time = max(time)
keep if time==final_time
drop final_time

* wide format
reshape wide price, string j(data_type) ///
	i(ticker contract_year contract_month date time auction_cusip close_time release_time)
rename pricemedian price_pre
rename pricecount N_obs_pre
rename time time_pre

sort close_time auction_cusip ticker contract_year contract_month
tempfile cme_pre
save `cme_pre', replace


* post data
n di "loading cme post"
use ../data/input_restricted/cme_auction_post.dta, clear

* keep first observation
bys ticker contract_year contract_month date auction_cusip (time): ///
	egen double first_time = min(time)
keep if time==first_time
drop first_time

* wide format
reshape wide price, string j(data_type) ///
	i(ticker contract_year contract_month date time auction_cusip close_time release_time)
rename pricemedian price_post
rename pricecount N_obs_post
rename time time_post

* merge with pre data
sort close_time auction_cusip ticker contract_year contract_month
merge 1:1 ticker contract_year contract_month date auction_cusip close_time release_time ///
	using `cme_pre', nogen keep(match)

* shocks and misc cleaning
sort close_time auction_cusip ticker contract_year contract_month
* shocks in pct points
gen D_shock = 100*log(price_post / price_pre)
drop if D_shock==.
* stata date format
replace date = dofc(date)
format date %td
* save
drop price_post price_pre
save ../data/input/cme_shock_all.dta, replace


******************************

* auction shock data
* closest securities only
use ../data/input/cme_shock_all.dta, clear

* match dates to contract year/months
gen _month = .
gen _year = .
replace _year = year(date) if month(date)<12
replace _year = year(date)+1 if month(date)==12
replace _month = 3 if month(date)==12 | month(date)==1 | month(date)==2
replace _month = 6 if month(date)==3 | month(date)==4 | month(date)==5
replace _month = 9 if month(date)==6 | month(date)==7 | month(date)==8
replace _month = 12 if month(date)==9 | month(date)==10 | month(date)==11
keep if contract_year==_year & contract_month==_month
* wide data
keep ticker date auction_cusip close_time release_time D_shock
reshape wide D_shock, string j(ticker) ///
	i(date auction_cusip close_time release_time)
* normalize
ds D_shock*
foreach var of varlist `r(varlist)' {
	summ `var'
	replace `var' = (`var' - `r(mean)')/`r(sd)'
}
* save
save ../data/input/cme_shock_cls.dta, replace



********************************************************************************
********************************************************************************
* taq
n di "cleaning taq"

******************************
* create auction taq shocks

* pre data
n di "loading taq pre"
use ../data/input_restricted/taq_auction_pre.dta, clear
drop volume

* keep last observation
bys ticker date auction_cusip (time): egen double final_time = max(time)
keep if time==final_time
drop final_time

* wide format
reshape wide price, string j(data_type) ///
	i(ticker date time auction_cusip close_time release_time)
rename pricemedian price_pre
rename pricecount N_obs_pre
rename time time_pre

sort close_time auction_cusip ticker 
tempfile taq_pre
save `taq_pre', replace


* post data
n di "loading taq post"
use ../data/input_restricted/taq_auction_post.dta, clear
drop volume

* keep first observation
bys ticker date auction_cusip (time): egen double first_time = min(time)
keep if time==first_time
drop first_time

* wide format
reshape wide price, string j(data_type) ///
	i(ticker date time auction_cusip close_time release_time)
rename pricemedian price_post
rename pricecount N_obs_post
rename time time_post

* merge with pre data
sort close_time auction_cusip ticker 
merge 1:1 ticker date auction_cusip close_time release_time ///
	using `taq_pre', nogen keep(match)

* shocks and misc cleaning
sort close_time auction_cusip ticker 
* shocks in pct points
gen D_shock = 100*log(price_post / price_pre)
drop if D_shock==.
* stata date format
replace date = dofc(date)
format date %td
* save
drop price_post price_pre
save ../data/input/taq_shock_all.dta, replace

******************************
* "wide" auction taq shocks
use ../data/input/taq_shock_all.dta, clear

keep ticker date auction_cusip close_time release_time D_shock
reshape wide D_shock, string j(ticker) ///
	i(date auction_cusip close_time release_time)
* save
save ../data/input/taq_shock.dta, replace



********************************************************************************
********************************************************************************
* non auction shocks
n di "cleaning govpx, non-auction dates"

******************************
* create non-auction govpx shocks
* use actual yield data pre-1999, indicative after
local ind_year_cutoff 1999

* pre data
n di "loading govpx pre (non-auction)"
use ../data/input_restricted/govpx_nonauction_pre.dta, clear
gen yld = bidyld if year(dofc(date))<`ind_year_cutoff'
replace yld = indbyld if year(dofc(date))>=`ind_year_cutoff'
drop bidyld askyld indbyld indayld

* keep last observation
bys cusip when_issued date (time): egen double final_time = max(time)
keep if time==final_time
drop final_time

* wide format
reshape wide yld, string j(data_type) i(cusip when_issued date time)
rename yldmedian yld_pre
rename yldcount N_obs_pre
rename time time_pre

sort date cusip when_issued
tempfile govpx_pre
save `govpx_pre', replace


* post data
n di "loading govpx post (non-auction)"
use ../data/input_restricted/govpx_nonauction_post.dta, clear
gen yld = bidyld if year(dofc(date))<`ind_year_cutoff'
replace yld = indbyld if year(dofc(date))>=`ind_year_cutoff'
drop bidyld askyld indbyld indayld

* keep first observation
bys cusip when_issued date (time): egen double first_time = min(time)
keep if time==first_time
drop first_time

* wide format
reshape wide yld, string j(data_type) i(cusip when_issued date time)
rename yldmedian yld_post
rename yldcount N_obs_post
rename time time_post

* merge with pre data
sort date cusip when_issued
merge 1:1 cusip when_issued date using `govpx_pre', nogen keep(match)

* shocks and misc cleaning
sort date cusip when_issued
* shocks in basis points
gen D_shock = 100*(yld_post - yld_pre)
drop if D_shock==.
* stata date format
replace date = dofc(date)
format date %td
* save
drop yld_post yld_pre
save ../data/input/govpx_nonauction_shock_all.dta, replace




********************************************************************************
********************************************************************************
* create cleaned example intraday files
n di "cleaning intraday example plots"
* 2 examples: 
* August 11, 2011 (auction: 912810QS0; newly issued 30Y)
* December 9, 2010 (auction: 912810QL5; re-opened 30Y)
local datenums 20101209 20110811
* for govpx
local cusips 912810QL5 912810QS0
local when_issueds 0 1
* for cme
local contract_years 2011 2011
local contract_months 3 9
local tickers US US

* close time
local close_times `=hms(13,0,0)' `=hms(13,0,0)'

* release times from bloomberg
local release_times `=hms(13,1,46)' `=hms(13,1,38)'

* time tick options
local xwindow `=hms(0,15,0)'
local xstep `=hms(0,5,0)'

forvalues i=1/2 {
	* pull out info for given auction
	local datenum: word `i' of `datenums'
	local close_time: word `i' of `close_times'
	local release_time: word `i' of `release_times'
	local title_str: word `i' of `title_strs'
	
	* create window variables
	local xmin = `close_time' - `xwindow'
	local xmax = `close_time' + `xwindow'
	
	n di `datenum'
	********
	* govpx
	n di "govpx"
	local cusip: word `i' of `cusips'
	local when_issued: word `i' of `when_issueds'
	
	use ../data/input_restricted/govpx_`datenum'.dta, clear
	keep if cusip=="`cusip'" & when_issued==`when_issued'
	sort time

	* easier time variable
	replace time = time - date 
	format time %tcHH:MM
	keep if time>=`xmin' & time<=`xmax'
	
	rename indbyld yld
	keep cusip when_issued date time yld
	
	* save
	save ../data/input/govpx_`datenum'_clean.dta, replace
	
	********
	* cme
	n di "cme"
	local contract_year: word `i' of `contract_years'
	local contract_month: word `i' of `contract_months'
	local ticker: word `i' of `tickers'
	
	use ../data/input_restricted/cme_`datenum'.dta, clear
	keep if ticker=="`ticker'" & ///
		contract_year==`contract_year' & contract_month==`contract_month'
	sort time

	* easier time variable
	replace time = time - date 
	format time %tcHH:MM
	keep if time>=`xmin' & time<=`xmax'
	
	* save
	save ../data/input/cme_`datenum'_clean.dta, replace
	
}



